

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import sys

# When running this file usually also add argument which is the path for the table
try:
    overleaf_path = sys.argv[1]
except:
    overleaf_path = './../Apps/Overleaf/bbm/draft/reports/revision/'

#%% IMPORT DATA -------------------------------------------------------------------------
df = pd.read_csv(
    './data_py/processed/contracts_final.csv',
    index_col=[0],
    parse_dates=['fixture_date']
)
df['year'] = df['fixture_date'].dt.year
df['month'] = df['fixture_date'].dt.month

# De-mean state variable controls so that rig-type indicators are interpretable
for k in ['g', 'n_l', 'n_m', 'n_h', 'mri']:
    df[k] = df[k] - df[k].mean()

#%% SET FORMULAS FOR REGRESSIONS --------------------------------------------------------
formulas_by_reg = dict()
summary_to_tex = dict()

# Set formulas
formulas_by_reg[0] = (
    "day_rate ~ C(spec, Treatment(reference='low')) " 
    "+ C(spec, Treatment(reference='low')):mri + dist" 
    "+ n_l:C(spec) + n_m:C(spec) + n_h:C(spec) -1"
)
formulas_by_reg[1] = (
    "day_rate ~ C(spec, Treatment(reference='low')) " 
    "+ C(spec, Treatment(reference='low')):mri + dist" 
    "+ gas : value + g:spec "
    "+ n_l:C(spec) + n_m:C(spec) + n_h:C(spec) -1"
)
formulas_by_reg[2] = (
    "day_rate ~ C(spec, Treatment(reference='low')) " 
    "+ C(spec, Treatment(reference='low')):mri + dist" 
    "+ gas : value + tau + g:spec "
    "+ n_l:C(spec) + n_m:C(spec) + n_h:C(spec) -1"
)

coefs_to_tex_names = {
    "C(spec, Treatment(reference='low'))[low]": 'low',
    "C(spec, Treatment(reference='low'))[mid]": 'mid',
    "C(spec, Treatment(reference='low'))[high]": 'high',
    "C(spec, Treatment(reference='low'))[low]:mri": 'low_boom',
    "C(spec, Treatment(reference='low'))[mid]:mri": 'mid_boom',
    "C(spec, Treatment(reference='low'))[high]:mri": 'high_boom',
    'gas:value': 'value',
    'tau': 'total_days_description',
    'reneg': 'reneg',
    'dist': 'dist'
}

#%% DO REGRESSIONS ----------------------------------------------------------------------
# Do regressions
for i in formulas_by_reg:
    reg = smf.ols(
        formula=formulas_by_reg[i],
        data=df
    ).fit(cov_type='HC0')

    summary_to_tex[f"r_{i}"] = round(reg.rsquared, 2)
    summary_to_tex[f"n_{i}"] = int(reg.nobs)
    for k in coefs_to_tex_names:
        try:
            if k == 'dist':
                round_digits = 1
            else:
                round_digits = 1
            summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                str(round(1000 * reg.params[k], round_digits))
            summary_to_tex[f"se_{coefs_to_tex_names[k]}_{i}"] = \
                str(round(1000 * reg.HC0_se[k], round_digits))

            p_value = reg.pvalues[k]
            # Add stars to the coefficients in the table
            if p_value < 0.01:
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "***"
            if (p_value >= 0.01) & (p_value < 0.05):
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "**"
            if (p_value >= 0.05) & (p_value < 0.1):
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "*"
        except:
            print(f"{k} not in regression {i}")

#%% PRODUCE THE TABLE -------------------------------------------------------------------
with open('./src/tex/table_price_hedonic.tex', 'r') as f:
    tex = f.read()
    output = tex.format(**summary_to_tex)

with open(overleaf_path + 'tables/table_price_hedonic.tex', 'w') as f:
    f.write(output)
